require(tidyverse)

require(urltools)

load("data/bushfire_tweets.dt.RData")

load("data/covid_tweets.dt.RData")

load("data/bushfire_users.dt.RData")

load("data/covid_users.dt.RData")

load("data/bushfire_hashtags.dt.RData")

load("data/covid_hashtags.dt.RData")

load("data/bushfire_urls.dt.RData")

load("data/covid_urls.dt.RData")

load("data/bushfire_media.dt.RData")

load("data/covid_media.dt.RData")


bushfire_tweets.dt$date <- 
  as.Date(bushfire_tweets.dt$created_at)

covid_tweets.dt$date <- 
  as.Date(covid_tweets.dt$created_at)

bushfire_urls.dt$clean_url <- 
  bushfire_urls.dt$expanded_url

bushfire_urls.dt$clean_url[!is.na(bushfire_urls.dt$unwounded_url)] <- 
  bushfire_urls.dt$unwounded_url[!is.na(bushfire_urls.dt$unwounded_url)]

ytUrlPrepare <- function(str) {
  if (grepl("https://(www|m)\\.youtube\\.com", str)) {
    youtube_vid <- 
      str_extract(str,
                  "[a-zA-Z0-9_-]{11}")
    return(paste0("https://youtu.be/",
                  youtube_vid))
  } else {
    return(str)
  }
}

bushfire_urls.dt$clean_url <- 
  sapply(bushfire_urls.dt$clean_url, ytUrlPrepare, simplify = T)

bushfire_urls.dt$clean_url <- 
  gsub("\\?(.*)", "", bushfire_urls.dt$clean_url)

bushfire_urls.dt$clean_url <- 
  gsub("#(.*)", "", bushfire_urls.dt$clean_url)

bushfire_urls.dt$domain <- 
  domain(bushfire_urls.dt$clean_url)

covid_urls.dt$clean_url <- 
  covid_urls.dt$expanded_url

covid_urls.dt$clean_url[!is.na(covid_urls.dt$unwounded_url)] <- 
  covid_urls.dt$unwounded_url[!is.na(covid_urls.dt$unwounded_url)]

covid_urls.dt$clean_url <- 
  sapply(covid_urls.dt$clean_url, ytUrlPrepare, simplify = T)

covid_urls.dt$clean_url <- 
  gsub("\\?(.*)", "", covid_urls.dt$clean_url)

covid_urls.dt$clean_url <- 
  gsub("#(.*)", "", covid_urls.dt$clean_url)

covid_urls.dt$domain <- 
  domain(covid_urls.dt$clean_url)

# save(bushfire_urls.dt, covid_urls.dt, file = "urls.dt.RData")

load("data/bushfire_coded_reuters_2020.RData")

load("data/covid_coded_reuters_2020.RData")

load("data/coded_users.RData")


# Predicted opinions

load("data/predicted_opinions_twt.RData")

load("data/predicted_opinions_twt_usr.RData")

opinion_theme.wiki <- 
  read.csv("data/opinion_theme.wiki.csv")

predicted_opinions_twt <- 
  predicted_opinions_twt %>% 
  dplyr::select(-topic_label) %>%
  dplyr::distinct()

predicted_opinions_twt$date <- 
  as.Date(predicted_opinions_twt$time)

predicted_opinions_twt$dataset <- 
  ifelse(predicted_opinions_twt$date < as.Date("2020-03-02"), "bushfire", "covid")